Python 3
# Discovering Higgs Boson signal from background noiseType Markdown and LaTeX:
import numpy as npimport pandas as pd#loading training.csv to memorydf = pd.read_csv('training.csv')#two unique values signifying signal and background noisedf['Label'].unique()array(['s', 'b'], dtype=object)
#replacing string with numerical values to run numerical methodsx = df['Label'].str.replace('b', '0', regex=False)x = x.str.replace('s', '1', regex=False)df['Label'] = x#converting label to intdf['Label'] = df.apply(lambda x: int(x[-1]), axis=1)df.drop('EventId', axis=1).describe()| DER_mass_MMC | DER_mass_transverse_met_lep | DER_mass_vis | DER_pt_h | DER_deltaeta_jet_jet | DER_mass_jet_jet | DER_prodeta_jet_jet | DER_deltar_tau_lep | DER_pt_tot | DER_sum_pt | ... | PRI_jet_num | PRI_jet_leading_pt | PRI_jet_leading_eta | PRI_jet_leading_phi | PRI_jet_subleading_pt | PRI_jet_subleading_eta | PRI_jet_subleading_phi | PRI_jet_all_pt | Weight | Label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | ... | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 | 250000.000000 |
| mean | -49.023079 | 49.239819 | 81.181982 | 57.895962 | -708.420675 | -601.237051 | -709.356603 | 2.373100 | 18.917332 | 158.432217 | ... | 0.979176 | -348.329567 | -399.254314 | -399.259788 | -692.381204 | -709.121609 | -709.118631 | 73.064591 | 1.646767 | 0.342668 |
| std | 406.345647 | 35.344886 | 40.828691 | 63.655682 | 454.480565 | 657.972302 | 453.019877 | 0.782911 | 22.273494 | 115.706115 | ... | 0.977426 | 532.962789 | 489.338286 | 489.333883 | 479.875496 | 453.384624 | 453.389017 | 98.015662 | 1.875103 | 0.474603 |
| min | -999.000000 | 0.000000 | 6.329000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 0.208000 | 0.000000 | 46.104000 | ... | 0.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | 0.000000 | 0.001502 | 0.000000 |
| 25% | 78.100750 | 19.241000 | 59.388750 | 14.068750 | -999.000000 | -999.000000 | -999.000000 | 1.810000 | 2.841000 | 77.550000 | ... | 0.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | -999.000000 | 0.000000 | 0.018636 | 0.000000 |
| 50% | 105.012000 | 46.524000 | 73.752000 | 38.467500 | -999.000000 | -999.000000 | -999.000000 | 2.491500 | 12.315500 | 120.664500 | ... | 1.000000 | 38.960000 | -1.872000 | -2.093000 | -999.000000 | -999.000000 | -999.000000 | 40.512500 | 1.156188 | 0.000000 |
| 75% | 130.606250 | 73.598000 | 92.259000 | 79.169000 | 0.490000 | 83.446000 | -4.593000 | 2.961000 | 27.591000 | 200.478250 | ... | 2.000000 | 75.349000 | 0.433000 | 0.503000 | 33.703000 | -2.457000 | -2.275000 | 109.933750 | 2.404128 | 1.000000 |
| max | 1192.026000 | 690.075000 | 1349.351000 | 2834.999000 | 8.503000 | 4974.979000 | 16.690000 | 5.684000 | 2834.999000 | 1852.462000 | ... | 3.000000 | 1120.573000 | 4.499000 | 3.141000 | 721.456000 | 4.500000 | 3.142000 | 1633.433000 | 7.822543 | 1.000000 |
8 rows × 32 columns
import matplotlib.pyplot as pltimport seaborn as sns%matplotlib inlinex
plt.figure(figsize=(18,14))sns.heatmap(df.drop(['EventId'], axis=1).corr(), cmap='coolwarm')plt.title('Correlation matrix', fontsize=24)Text(0.5, 1, 'Correlation matrix')
#Robust scaler works best with outliersfrom sklearn.preprocessing import RobustScalerscaler = RobustScaler()#scaling all values except the index and target variablesscaler.fit(df.drop(['EventId', 'Weight', 'Label'], axis=1))RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
with_scaling=True)scaled_data = scaler.transform(df.drop(['EventId', 'Weight', 'Label'], axis=1))from sklearn.decomposition import PCA#testing PCA with 2 componentspca = PCA(n_components=2)pca.fit(scaled_data)PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
svd_solver='auto', tol=0.0, whiten=False)pca_data = pca.transform(scaled_data)#1st component of PCA covers 79.9% of the total variance pca.explained_variance_ratio_array([0.79941427, 0.07158938])
for i in range(0, 13): pca = PCA(n_components=i) pca.fit(scaled_data) pca_data = pca.transform(scaled_data)pca.explained_variance_ratio_array([0.79941427, 0.07158938, 0.02945643, 0.02295639, 0.01351438,
0.00996207, 0.00880798, 0.0084082 , 0.00619115, 0.00534548,
0.00445186, 0.00405008])var_sum = []total = 0for i in range(0, len(pca.explained_variance_ratio_)): total += pca.explained_variance_ratio_[i] var_sum.append(total) var_sum#13 components cover almost 95% of the total variance[0.7994142682423108, 0.8710036465167198, 0.9004600789164949, 0.9234164705934974, 0.9369308505878781, 0.946892924605498, 0.9557009032955126, 0.964109108282042, 0.9703002544228476, 0.9756457300864789, 0.9800975889437665, 0.9841476721518224]
plt.figure(figsize=(14,10))plt.grid(linestyle='-', linewidth=1)plt.xticks(range(0,20))plt.yticks(np.arange(0, 1, step=0.03))plt.xlabel('Dimensions (PCA - n-components)')plt.ylabel('Variance')plt.xlim(0.0, 13)plt.ylim(0.77,1)plt.plot(range(1,13), var_sum, color='blue', marker='o', linestyle='dashed', linewidth=2, markersize=10, markerfacecolor='red')[<matplotlib.lines.Line2D at 0x7f769315d630>]
pca.components_.shape(12, 30)
#this dataframe is the correlation of each feature with each principal componentdf_comp = pd.DataFrame(pca.components_, columns=df.drop(['EventId', 'Weight', 'Label'], axis=1).columns)df_comp| DER_mass_MMC | DER_mass_transverse_met_lep | DER_mass_vis | DER_pt_h | DER_deltaeta_jet_jet | DER_mass_jet_jet | DER_prodeta_jet_jet | DER_deltar_tau_lep | DER_pt_tot | DER_sum_pt | ... | PRI_met_phi | PRI_met_sumet | PRI_jet_num | PRI_jet_leading_pt | PRI_jet_leading_eta | PRI_jet_leading_phi | PRI_jet_subleading_pt | PRI_jet_subleading_eta | PRI_jet_subleading_phi | PRI_jet_all_pt | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.995349 | -0.038413 | 0.027727 | 0.026647 | 0.010302 | 0.013589 | 0.010314 | 0.019463 | 0.006051 | 0.026359 | ... | 0.000564 | 0.027486 | 0.014824 | 0.016759 | 0.016302 | 0.016300 | 0.010548 | 0.010305 | 0.010303 | 0.023016 |
| 1 | -0.054560 | -0.018467 | -0.014592 | 0.365796 | 0.145052 | 0.192638 | 0.145342 | -0.163384 | 0.167182 | 0.377842 | ... | 0.001444 | 0.340174 | 0.168647 | 0.142420 | 0.129454 | 0.129445 | 0.150277 | 0.145148 | 0.145121 | 0.352807 |
| 2 | -0.017486 | 0.204188 | 0.688189 | -0.032016 | -0.036906 | -0.051953 | -0.036880 | 0.198528 | -0.001672 | 0.061692 | ... | -0.001873 | 0.018460 | -0.039027 | -0.034266 | -0.034288 | -0.034284 | -0.037454 | -0.036887 | -0.036881 | -0.043294 |
| 3 | -0.023935 | -0.080422 | 0.399547 | -0.011000 | -0.030274 | -0.032693 | -0.030394 | 0.076708 | -0.029584 | 0.038241 | ... | 0.000258 | 0.048186 | -0.036338 | -0.028207 | -0.029022 | -0.029015 | -0.030834 | -0.030313 | -0.030310 | -0.034375 |
| 4 | 0.040622 | 0.067508 | -0.244816 | 0.295481 | -0.250106 | -0.305083 | -0.250444 | -0.197506 | 0.004540 | 0.018443 | ... | -0.000262 | 0.032832 | -0.204747 | -0.086430 | -0.101232 | -0.101224 | -0.251974 | -0.250173 | -0.250133 | -0.047927 |
| 5 | 0.016219 | 0.093117 | 0.185329 | -0.067361 | -0.021505 | -0.040592 | -0.021261 | 0.226532 | 0.780652 | -0.072879 | ... | -0.002002 | 0.050763 | -0.003538 | -0.070937 | -0.069324 | -0.069312 | -0.019187 | -0.021407 | -0.021411 | 0.022604 |
| 6 | 0.000684 | 0.006224 | -0.013188 | -0.020556 | 0.007366 | 0.009416 | 0.007389 | -0.003352 | 0.066664 | 0.006039 | ... | -0.001856 | 0.012288 | -0.002714 | -0.023809 | -0.024537 | -0.024207 | 0.007891 | 0.007248 | 0.007370 | 0.003812 |
| 7 | 0.017312 | 0.022180 | 0.259510 | 0.137755 | 0.040645 | 0.059896 | 0.040490 | 0.197480 | -0.519610 | -0.111481 | ... | -0.000429 | -0.163281 | 0.023878 | 0.077593 | 0.077580 | 0.077595 | 0.037895 | 0.040571 | 0.040574 | -0.025283 |
| 8 | 0.037111 | 0.223778 | -0.157950 | -0.119322 | 0.163851 | 0.228257 | 0.163933 | -0.032647 | -0.075146 | -0.010722 | ... | -0.013782 | -0.043541 | -0.119344 | -0.412197 | -0.426591 | -0.426564 | 0.167048 | 0.163873 | 0.163839 | -0.057361 |
| 9 | -0.000234 | 0.004843 | -0.002665 | -0.000099 | -0.001063 | -0.002424 | -0.001063 | 0.001844 | 0.006747 | -0.000745 | ... | -0.033571 | -0.006555 | 0.003600 | 0.006860 | 0.007379 | 0.007285 | -0.001193 | -0.001070 | -0.001080 | -0.001630 |
| 10 | -0.003144 | -0.059963 | 0.009251 | 0.019069 | -0.000016 | 0.003380 | -0.000024 | 0.017726 | -0.002938 | 0.009200 | ... | 0.979117 | 0.004438 | -0.013002 | -0.025541 | -0.027976 | -0.028630 | 0.000532 | -0.000025 | -0.000160 | 0.012677 |
| 11 | 0.035328 | 0.623509 | -0.108753 | -0.171527 | 0.051418 | 0.016196 | 0.051593 | -0.226019 | 0.127588 | -0.197955 | ... | 0.072189 | -0.231311 | 0.140567 | 0.236743 | 0.270129 | 0.270041 | 0.041431 | 0.051427 | 0.051407 | -0.279006 |
12 rows × 30 columns
plt.figure(figsize=(16,12))sns.heatmap(df_comp, cmap='plasma')<matplotlib.axes._subplots.AxesSubplot at 0x7f7693d795c0>
Type Markdown and LaTeX:
xxxxxxxxxxt-SNEt-SNE
Type Markdown and LaTeX:
from sklearn.manifold import TSNEx
#applying t-SNE with 3 componentstsne = TSNE(n_components=3, n_iter=250, verbose=1, n_iter_without_progress=50)pca_data.shape(250000, 12)
import time#measuring elapsed time to transform tsne modeltime_start = time.time()tsne_arr = tsne.fit_transform(pca_data)print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))[t-SNE] Computing 91 nearest neighbors... [t-SNE] Indexed 250000 samples in 0.285s... [t-SNE] Computed neighbors for 250000 samples in 138.546s... [t-SNE] Computed conditional probabilities for sample 1000 / 250000 [t-SNE] Computed conditional probabilities for sample 2000 / 250000 [t-SNE] Computed conditional probabilities for sample 3000 / 250000 [t-SNE] Computed conditional probabilities for sample 4000 / 250000 [t-SNE] Computed conditional probabilities for sample 5000 / 250000 [t-SNE] Computed conditional probabilities for sample 6000 / 250000 [t-SNE] Computed conditional probabilities for sample 7000 / 250000 [t-SNE] Computed conditional probabilities for sample 8000 / 250000 [t-SNE] Computed conditional probabilities for sample 9000 / 250000 [t-SNE] Computed conditional probabilities for sample 10000 / 250000 [t-SNE] Computed conditional probabilities for sample 11000 / 250000 [t-SNE] Computed conditional probabilities for sample 12000 / 250000 [t-SNE] Computed conditional probabilities for sample 13000 / 250000 [t-SNE] Computed conditional probabilities for sample 14000 / 250000 [t-SNE] Computed conditional probabilities for sample 15000 / 250000 [t-SNE] Computed conditional probabilities for sample 16000 / 250000 [t-SNE] Computed conditional probabilities for sample 17000 / 250000 [t-SNE] Computed conditional probabilities for sample 18000 / 250000 [t-SNE] Computed conditional probabilities for sample 19000 / 250000 [t-SNE] Computed conditional probabilities for sample 20000 / 250000 [t-SNE] Computed conditional probabilities for sample 21000 / 250000 [t-SNE] Computed conditional probabilities for sample 22000 / 250000 [t-SNE] Computed conditional probabilities for sample 23000 / 250000 [t-SNE] Computed conditional probabilities for sample 24000 / 250000 [t-SNE] Computed conditional probabilities for sample 25000 / 250000 [t-SNE] Computed conditional probabilities for sample 26000 / 250000 [t-SNE] Computed conditional probabilities for sample 27000 / 250000 [t-SNE] Computed conditional probabilities for sample 28000 / 250000 [t-SNE] Computed conditional probabilities for sample 29000 / 250000 [t-SNE] Computed conditional probabilities for sample 30000 / 250000 [t-SNE] Computed conditional probabilities for sample 31000 / 250000 [t-SNE] Computed conditional probabilities for sample 32000 / 250000 [t-SNE] Computed conditional probabilities for sample 33000 / 250000 [t-SNE] Computed conditional probabilities for sample 34000 / 250000 [t-SNE] Computed conditional probabilities for sample 35000 / 250000 [t-SNE] Computed conditional probabilities for sample 36000 / 250000 [t-SNE] Computed conditional probabilities for sample 37000 / 250000 [t-SNE] Computed conditional probabilities for sample 38000 / 250000 [t-SNE] Computed conditional probabilities for sample 39000 / 250000 [t-SNE] Computed conditional probabilities for sample 40000 / 250000 [t-SNE] Computed conditional probabilities for sample 41000 / 250000 [t-SNE] Computed conditional probabilities for sample 42000 / 250000 [t-SNE] Computed conditional probabilities for sample 43000 / 250000 [t-SNE] Computed conditional probabilities for sample 44000 / 250000 [t-SNE] Computed conditional probabilities for sample 45000 / 250000 [t-SNE] Computed conditional probabilities for sample 46000 / 250000 [t-SNE] Computed conditional probabilities for sample 47000 / 250000 [t-SNE] Computed conditional probabilities for sample 48000 / 250000 [t-SNE] Computed conditional probabilities for sample 49000 / 250000 [t-SNE] Computed conditional probabilities for sample 50000 / 250000 [t-SNE] Computed conditional probabilities for sample 51000 / 250000 [t-SNE] Computed conditional probabilities for sample 52000 / 250000 [t-SNE] Computed conditional probabilities for sample 53000 / 250000 [t-SNE] Computed conditional probabilities for sample 54000 / 250000 [t-SNE] Computed conditional probabilities for sample 55000 / 250000 [t-SNE] Computed conditional probabilities for sample 56000 / 250000 [t-SNE] Computed conditional probabilities for sample 57000 / 250000 [t-SNE] Computed conditional probabilities for sample 58000 / 250000 [t-SNE] Computed conditional probabilities for sample 59000 / 250000 [t-SNE] Computed conditional probabilities for sample 60000 / 250000 [t-SNE] Computed conditional probabilities for sample 61000 / 250000 [t-SNE] Computed conditional probabilities for sample 62000 / 250000 [t-SNE] Computed conditional probabilities for sample 63000 / 250000 [t-SNE] Computed conditional probabilities for sample 64000 / 250000 [t-SNE] Computed conditional probabilities for sample 65000 / 250000 [t-SNE] Computed conditional probabilities for sample 66000 / 250000 [t-SNE] Computed conditional probabilities for sample 67000 / 250000 [t-SNE] Computed conditional probabilities for sample 68000 / 250000 [t-SNE] Computed conditional probabilities for sample 69000 / 250000 [t-SNE] Computed conditional probabilities for sample 70000 / 250000 [t-SNE] Computed conditional probabilities for sample 71000 / 250000 [t-SNE] Computed conditional probabilities for sample 72000 / 250000 [t-SNE] Computed conditional probabilities for sample 73000 / 250000 [t-SNE] Computed conditional probabilities for sample 74000 / 250000 [t-SNE] Computed conditional probabilities for sample 75000 / 250000 [t-SNE] Computed conditional probabilities for sample 76000 / 250000 [t-SNE] Computed conditional probabilities for sample 77000 / 250000 [t-SNE] Computed conditional probabilities for sample 78000 / 250000 [t-SNE] Computed conditional probabilities for sample 79000 / 250000 [t-SNE] Computed conditional probabilities for sample 80000 / 250000 [t-SNE] Computed conditional probabilities for sample 81000 / 250000 [t-SNE] Computed conditional probabilities for sample 82000 / 250000 [t-SNE] Computed conditional probabilities for sample 83000 / 250000 [t-SNE] Computed conditional probabilities for sample 84000 / 250000 [t-SNE] Computed conditional probabilities for sample 85000 / 250000 [t-SNE] Computed conditional probabilities for sample 86000 / 250000 [t-SNE] Computed conditional probabilities for sample 87000 / 250000 [t-SNE] Computed conditional probabilities for sample 88000 / 250000 [t-SNE] Computed conditional probabilities for sample 89000 / 250000 [t-SNE] Computed conditional probabilities for sample 90000 / 250000 [t-SNE] Computed conditional probabilities for sample 91000 / 250000 [t-SNE] Computed conditional probabilities for sample 92000 / 250000 [t-SNE] Computed conditional probabilities for sample 93000 / 250000 [t-SNE] Computed conditional probabilities for sample 94000 / 250000 [t-SNE] Computed conditional probabilities for sample 95000 / 250000 [t-SNE] Computed conditional probabilities for sample 96000 / 250000 [t-SNE] Computed conditional probabilities for sample 97000 / 250000 [t-SNE] Computed conditional probabilities for sample 98000 / 250000 [t-SNE] Computed conditional probabilities for sample 99000 / 250000 [t-SNE] Computed conditional probabilities for sample 100000 / 250000 [t-SNE] Computed conditional probabilities for sample 101000 / 250000 [t-SNE] Computed conditional probabilities for sample 102000 / 250000 [t-SNE] Computed conditional probabilities for sample 103000 / 250000 [t-SNE] Computed conditional probabilities for sample 104000 / 250000 [t-SNE] Computed conditional probabilities for sample 105000 / 250000 [t-SNE] Computed conditional probabilities for sample 106000 / 250000 [t-SNE] Computed conditional probabilities for sample 107000 / 250000 [t-SNE] Computed conditional probabilities for sample 108000 / 250000 [t-SNE] Computed conditional probabilities for sample 109000 / 250000 [t-SNE] Computed conditional probabilities for sample 110000 / 250000 [t-SNE] Computed conditional probabilities for sample 111000 / 250000 [t-SNE] Computed conditional probabilities for sample 112000 / 250000 [t-SNE] Computed conditional probabilities for sample 113000 / 250000 [t-SNE] Computed conditional probabilities for sample 114000 / 250000 [t-SNE] Computed conditional probabilities for sample 115000 / 250000 [t-SNE] Computed conditional probabilities for sample 116000 / 250000 [t-SNE] Computed conditional probabilities for sample 117000 / 250000 [t-SNE] Computed conditional probabilities for sample 118000 / 250000 [t-SNE] Computed conditional probabilities for sample 119000 / 250000 [t-SNE] Computed conditional probabilities for sample 120000 / 250000 [t-SNE] Computed conditional probabilities for sample 121000 / 250000 [t-SNE] Computed conditional probabilities for sample 122000 / 250000 [t-SNE] Computed conditional probabilities for sample 123000 / 250000 [t-SNE] Computed conditional probabilities for sample 124000 / 250000 [t-SNE] Computed conditional probabilities for sample 125000 / 250000 [t-SNE] Computed conditional probabilities for sample 126000 / 250000
[t-SNE] Computed conditional probabilities for sample 127000 / 250000 [t-SNE] Computed conditional probabilities for sample 128000 / 250000 [t-SNE] Computed conditional probabilities for sample 129000 / 250000 [t-SNE] Computed conditional probabilities for sample 130000 / 250000 [t-SNE] Computed conditional probabilities for sample 131000 / 250000 [t-SNE] Computed conditional probabilities for sample 132000 / 250000 [t-SNE] Computed conditional probabilities for sample 133000 / 250000 [t-SNE] Computed conditional probabilities for sample 134000 / 250000 [t-SNE] Computed conditional probabilities for sample 135000 / 250000 [t-SNE] Computed conditional probabilities for sample 136000 / 250000 [t-SNE] Computed conditional probabilities for sample 137000 / 250000 [t-SNE] Computed conditional probabilities for sample 138000 / 250000 [t-SNE] Computed conditional probabilities for sample 139000 / 250000 [t-SNE] Computed conditional probabilities for sample 140000 / 250000 [t-SNE] Computed conditional probabilities for sample 141000 / 250000 [t-SNE] Computed conditional probabilities for sample 142000 / 250000 [t-SNE] Computed conditional probabilities for sample 143000 / 250000 [t-SNE] Computed conditional probabilities for sample 144000 / 250000 [t-SNE] Computed conditional probabilities for sample 145000 / 250000 [t-SNE] Computed conditional probabilities for sample 146000 / 250000 [t-SNE] Computed conditional probabilities for sample 147000 / 250000 [t-SNE] Computed conditional probabilities for sample 148000 / 250000 [t-SNE] Computed conditional probabilities for sample 149000 / 250000 [t-SNE] Computed conditional probabilities for sample 150000 / 250000 [t-SNE] Computed conditional probabilities for sample 151000 / 250000 [t-SNE] Computed conditional probabilities for sample 152000 / 250000 [t-SNE] Computed conditional probabilities for sample 153000 / 250000 [t-SNE] Computed conditional probabilities for sample 154000 / 250000 [t-SNE] Computed conditional probabilities for sample 155000 / 250000 [t-SNE] Computed conditional probabilities for sample 156000 / 250000 [t-SNE] Computed conditional probabilities for sample 157000 / 250000 [t-SNE] Computed conditional probabilities for sample 158000 / 250000 [t-SNE] Computed conditional probabilities for sample 159000 / 250000 [t-SNE] Computed conditional probabilities for sample 160000 / 250000 [t-SNE] Computed conditional probabilities for sample 161000 / 250000 [t-SNE] Computed conditional probabilities for sample 162000 / 250000 [t-SNE] Computed conditional probabilities for sample 163000 / 250000 [t-SNE] Computed conditional probabilities for sample 164000 / 250000 [t-SNE] Computed conditional probabilities for sample 165000 / 250000 [t-SNE] Computed conditional probabilities for sample 166000 / 250000 [t-SNE] Computed conditional probabilities for sample 167000 / 250000 [t-SNE] Computed conditional probabilities for sample 168000 / 250000 [t-SNE] Computed conditional probabilities for sample 169000 / 250000 [t-SNE] Computed conditional probabilities for sample 170000 / 250000 [t-SNE] Computed conditional probabilities for sample 171000 / 250000 [t-SNE] Computed conditional probabilities for sample 172000 / 250000 [t-SNE] Computed conditional probabilities for sample 173000 / 250000 [t-SNE] Computed conditional probabilities for sample 174000 / 250000 [t-SNE] Computed conditional probabilities for sample 175000 / 250000 [t-SNE] Computed conditional probabilities for sample 176000 / 250000 [t-SNE] Computed conditional probabilities for sample 177000 / 250000 [t-SNE] Computed conditional probabilities for sample 178000 / 250000 [t-SNE] Computed conditional probabilities for sample 179000 / 250000 [t-SNE] Computed conditional probabilities for sample 180000 / 250000 [t-SNE] Computed conditional probabilities for sample 181000 / 250000 [t-SNE] Computed conditional probabilities for sample 182000 / 250000 [t-SNE] Computed conditional probabilities for sample 183000 / 250000 [t-SNE] Computed conditional probabilities for sample 184000 / 250000 [t-SNE] Computed conditional probabilities for sample 185000 / 250000 [t-SNE] Computed conditional probabilities for sample 186000 / 250000 [t-SNE] Computed conditional probabilities for sample 187000 / 250000 [t-SNE] Computed conditional probabilities for sample 188000 / 250000 [t-SNE] Computed conditional probabilities for sample 189000 / 250000 [t-SNE] Computed conditional probabilities for sample 190000 / 250000 [t-SNE] Computed conditional probabilities for sample 191000 / 250000 [t-SNE] Computed conditional probabilities for sample 192000 / 250000 [t-SNE] Computed conditional probabilities for sample 193000 / 250000 [t-SNE] Computed conditional probabilities for sample 194000 / 250000 [t-SNE] Computed conditional probabilities for sample 195000 / 250000 [t-SNE] Computed conditional probabilities for sample 196000 / 250000 [t-SNE] Computed conditional probabilities for sample 197000 / 250000 [t-SNE] Computed conditional probabilities for sample 198000 / 250000 [t-SNE] Computed conditional probabilities for sample 199000 / 250000 [t-SNE] Computed conditional probabilities for sample 200000 / 250000 [t-SNE] Computed conditional probabilities for sample 201000 / 250000 [t-SNE] Computed conditional probabilities for sample 202000 / 250000 [t-SNE] Computed conditional probabilities for sample 203000 / 250000 [t-SNE] Computed conditional probabilities for sample 204000 / 250000 [t-SNE] Computed conditional probabilities for sample 205000 / 250000 [t-SNE] Computed conditional probabilities for sample 206000 / 250000 [t-SNE] Computed conditional probabilities for sample 207000 / 250000 [t-SNE] Computed conditional probabilities for sample 208000 / 250000 [t-SNE] Computed conditional probabilities for sample 209000 / 250000 [t-SNE] Computed conditional probabilities for sample 210000 / 250000 [t-SNE] Computed conditional probabilities for sample 211000 / 250000 [t-SNE] Computed conditional probabilities for sample 212000 / 250000 [t-SNE] Computed conditional probabilities for sample 213000 / 250000 [t-SNE] Computed conditional probabilities for sample 214000 / 250000 [t-SNE] Computed conditional probabilities for sample 215000 / 250000 [t-SNE] Computed conditional probabilities for sample 216000 / 250000 [t-SNE] Computed conditional probabilities for sample 217000 / 250000 [t-SNE] Computed conditional probabilities for sample 218000 / 250000 [t-SNE] Computed conditional probabilities for sample 219000 / 250000 [t-SNE] Computed conditional probabilities for sample 220000 / 250000 [t-SNE] Computed conditional probabilities for sample 221000 / 250000 [t-SNE] Computed conditional probabilities for sample 222000 / 250000 [t-SNE] Computed conditional probabilities for sample 223000 / 250000 [t-SNE] Computed conditional probabilities for sample 224000 / 250000 [t-SNE] Computed conditional probabilities for sample 225000 / 250000 [t-SNE] Computed conditional probabilities for sample 226000 / 250000 [t-SNE] Computed conditional probabilities for sample 227000 / 250000 [t-SNE] Computed conditional probabilities for sample 228000 / 250000 [t-SNE] Computed conditional probabilities for sample 229000 / 250000 [t-SNE] Computed conditional probabilities for sample 230000 / 250000 [t-SNE] Computed conditional probabilities for sample 231000 / 250000 [t-SNE] Computed conditional probabilities for sample 232000 / 250000 [t-SNE] Computed conditional probabilities for sample 233000 / 250000 [t-SNE] Computed conditional probabilities for sample 234000 / 250000 [t-SNE] Computed conditional probabilities for sample 235000 / 250000 [t-SNE] Computed conditional probabilities for sample 236000 / 250000 [t-SNE] Computed conditional probabilities for sample 237000 / 250000 [t-SNE] Computed conditional probabilities for sample 238000 / 250000 [t-SNE] Computed conditional probabilities for sample 239000 / 250000 [t-SNE] Computed conditional probabilities for sample 240000 / 250000 [t-SNE] Computed conditional probabilities for sample 241000 / 250000 [t-SNE] Computed conditional probabilities for sample 242000 / 250000 [t-SNE] Computed conditional probabilities for sample 243000 / 250000 [t-SNE] Computed conditional probabilities for sample 244000 / 250000 [t-SNE] Computed conditional probabilities for sample 245000 / 250000 [t-SNE] Computed conditional probabilities for sample 246000 / 250000 [t-SNE] Computed conditional probabilities for sample 247000 / 250000 [t-SNE] Computed conditional probabilities for sample 248000 / 250000 [t-SNE] Computed conditional probabilities for sample 249000 / 250000 [t-SNE] Computed conditional probabilities for sample 250000 / 250000 [t-SNE] Mean sigma: 0.387824
[t-SNE] KL divergence after 50 iterations with early exaggeration: 125.084518 [t-SNE] KL divergence after 250 iterations: 5.860478 t-SNE done! Time elapsed: 6838.500510692596 seconds
tsne_arr.shape(250000, 3)
#creating a dataframe with 3 new components obtained by feature extraction using tsnetsne_df = pd.DataFrame()tsne_df['tsne_1'] = tsne_arr[:, 0]tsne_df['tsne_2'] = tsne_arr[:, 1]tsne_df['tsne_3'] = tsne_arr[:, 2]tsne_df['label'] = df.iloc[:, -1]import plotly.express as pxtsne_df = pd.read_csv('tsne_data.csv')